#!/usr/local/bin/python
#-*- encoding:utf-8 -*- 
import string 
from whoosh.index import create_in  
from whoosh.fields import *  
import jieba.analyse
from whoosh.index import open_dir
from whoosh.scoring import *
from whoosh.matching import *
from whoosh.qparser import QueryParser
import utils
import jieba
analyzer = jieba.analyse.ChineseAnalyzer();
def valid(queryString):
    di = [];
    tli = analyzer(queryString.decode('utf-8'));
    for t in tli:
        di.append(t);
    if len(di) > 0:
        return True;
    else:
        return False;

def process(raw_app_info, result_app_info,  wasted_app_info, empty_app_info):
    raw_app_info = open(raw_app_info, 'r');
    result_app_info = open(result_app_info, 'w');
    wasted_app_info = open(wasted_app_info, 'w');
    empty_app_info = open(empty_app_info, 'w');
    for line in raw_app_info:
        li = line.strip().split('\t');
        if len(li) < 2:
            empty_app_info.write(line);
            continue;
        if valid(li[1]):
            result_app_info.write(line);
        else:
            wasted_app_info.write(line);
    raw_app_info.close();
    result_app_info.close();
    wasted_app_info.close();

if __name__ == '__main__':
    raw_app_info = '/home/wangshuxin/app_info.mp'
    result_app_info = '/home/wangshuxin/app_info_purge.mp';
    wasted_app_info = '/home/wangshuxin/app_info_wasted.mp';
    empty_app_info = '/home/wangshuxin/app_info_empty.mp';
    process(raw_app_info, result_app_info, wasted_app_info, empty_app_info);

